From: akw27@arcadians.cl.cam.ac.uk Date: Thu, 3 Mar 2005 21:00:54 +0000 (+0000) Subject: bitkeeper revision 1.1236.7.1 (42277b06gMBo3oqaq35om--uwNImNg) X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~17857^2~76^2 X-Git-Url: https://dgit.raspbian.org/%22http:/www.example.com/cgi/%22https:/%22bookmarks://%22Dat/%22http:/www.example.com/cgi/%22https:/%22bookmarks:/%22Dat?a=commitdiff_plain;h=c63b6d985bcdcf45fe682b8aefbe537479ace5de;p=xen.git bitkeeper revision 1.1236.7.1 (42277b06gMBo3oqaq35om--uwNImNg) Initial checkin of parallax code. See README-PARALLAX for details. Signed-off-by: andrew.warfield@cl.cam.ac.uk --- diff --git a/.rootkeys b/.rootkeys index 3a0d744343..fbe6ef5fb4 100644 --- a/.rootkeys +++ b/.rootkeys @@ -317,6 +317,7 @@ 40e1b09db5mN69Ijj0X_Eol-S7dXiw tools/Rules.mk 4209033eUwhDBJ_bxejiv5c6gjXS4A tools/blktap/Makefile 4209033ewLAHdhGrT_2jo3Gb_5bDcA tools/blktap/README +42277b02mYXxgijE7MFeUe9d8eldMw tools/blktap/README-PARALLAX 4209033eX_Xw94wHaOCtnU9nOAtSJA tools/blktap/blkaio.c 4209033egwf6LDxM2hbaqi9rRdZy4A tools/blktap/blkaiolib.c 4209033f9yELLK85Ipo2oKjr3ickgQ tools/blktap/blkaiolib.h @@ -335,10 +336,26 @@ 42090340_mvZtozMjghPJO0qsjk4NQ tools/blktap/blkint.h 42090340rc2q1wmlGn6HtiJAkqhtNQ tools/blktap/blktaplib.c 42090340C-WkRPT7N3t-8Lzehzogdw tools/blktap/blktaplib.h +42277b02WrfP1meTDPv1M5swFq8oHQ tools/blktap/blockstore.c +42277b02P1C0FYj3gqwTZUD8sxKCug tools/blktap/blockstore.h 42090340B3mDvcxvd9ehDHUkg46hvw tools/blktap/libgnbd/Makefile 42090340ZWkc5Xhf9lpQmDON8HJXww tools/blktap/libgnbd/gnbdtest.c 42090340ocMiUScJE3OpY7QNunvSbg tools/blktap/libgnbd/libgnbd.c 42090340G5_F_EeVnPORKB0pTMGGhA tools/blktap/libgnbd/libgnbd.h +42277b03930x2TJT3PZlw6o0GERXpw tools/blktap/parallax.c +42277b03XQYq8bujXSz7JAZ8N7j_pA tools/blktap/radix.c +42277b03vZ4-jno_mgKmAcCW3ycRAg tools/blktap/radix.h +42277b03U_wLHL-alMA0bfxGlqldXg tools/blktap/snaplog.c +42277b04Ryya-z662BEx8HnxNN0dGQ tools/blktap/snaplog.h +42277b04LxFjptgZ75Z98DUAso4Prg tools/blktap/vdi.c +42277b04tt5QkIvs8She8CQqH5kwpg tools/blktap/vdi.h +42277b04zMAhB0_946sHQ_H2vwnt0Q tools/blktap/vdi_create.c +42277b04xB_iUmiSm6nKcy8OV8bckA tools/blktap/vdi_fill.c +42277b045CJGD_rKH-ZT_-0X4knhWA tools/blktap/vdi_list.c +42277b043ZKx0NJSbcgptQctQ5rerg tools/blktap/vdi_snap.c +42277b043Fjy5-H7LyBtUPyDlZFo6A tools/blktap/vdi_snap_list.c +42277b04vhqD6Lq3WmGbaESoAAKdhw tools/blktap/vdi_tree.c +42277b047H8fTVyUf75BWAjh6Zpsqg tools/blktap/vdi_validate.c 4124b307nRyK3dhn1hAsvrY76NuV3g tools/check/Makefile 4124b307vHLUWbfpemVefmaWDcdfag tools/check/README 4124b307jt7T3CHysgl9LijNHSe1tA tools/check/check_brctl diff --git a/tools/blktap/Makefile b/tools/blktap/Makefile index 389095e68c..50d77b905b 100644 --- a/tools/blktap/Makefile +++ b/tools/blktap/Makefile @@ -7,11 +7,34 @@ CC = gcc XEN_ROOT = ../.. include $(XEN_ROOT)/tools/Rules.mk +BLKTAP_INSTALL_DIR = /usr/sbin + +INSTALL = install +INSTALL_PROG = $(INSTALL) -m0755 +INSTALL_DIR = $(INSTALL) -d -m0755 + INCLUDES += SRCS := SRCS += blktaplib.c +PLX_SRCS := +PLX_SRCS += vdi.c +PLX_SRCS += radix.c +PLX_SRCS += blockstore.c +PLX_SRCS += snaplog.c +VDI_SRCS := $(PLX_SRCS) +PLX_SRCS += parallax.c + +VDI_TOOLS := +VDI_TOOLS += vdi_create +VDI_TOOLS += vdi_list +VDI_TOOLS += vdi_snap +VDI_TOOLS += vdi_snap_list +VDI_TOOLS += vdi_fill +VDI_TOOLS += vdi_tree +VDI_TOOLS += vdi_validate + CFLAGS += -Wall CFLAGS += -Werror CFLAGS += -Wno-unused @@ -30,7 +53,7 @@ OBJS = $(patsubst %.c,%.o,$(SRCS)) LIB = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR) -all: mk-symlinks blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd blkaio +all: mk-symlinks blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd $(VDI_TOOLS) parallax $(MAKE) $(LIB) LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse) @@ -45,16 +68,14 @@ mk-symlinks: ln -sf ../../$(LINUX_ROOT)/include/asm-xen/linux-public/*.h . ) install: all - mkdir -p $(prefix)/usr/lib - mkdir -p $(prefix)/usr/include - install -m0755 $(LIB) $(prefix)/usr/lib - ln -sf libblktap.so.$(MAJOR).$(MINOR) \ - $(prefix)/usr/lib/libblktap.so.$(MAJOR) - ln -sf libblktap.so.$(MAJOR) $(prefix)/usr/lib/libblktap.so - install -m0644 blktaplib.h $(prefix)/usr/include + $(INSTALL_DIR) -p $(DESTDIR)/usr/lib + $(INSTALL_DIR) -p $(DESTDIR)/usr/include + $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/lib + $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include + $(INSTALL_PROG) blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd $(DESTDIR)/$(BLKTAP_INSTALL_DIR) clean: - rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd blkaio + rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd blkaio $(VDI_TOOLS) parallax rpm: all rm -rf staging @@ -93,6 +114,38 @@ blkcowgnbd: $(LIB) blkgnbd.c blkcowlib.c blkgnbdlib.c blkaio: $(LIB) blkaio.c blkaiolib.c $(CC) $(CFLAGS) -o blkaio -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkaio.c blkaiolib.c -laio -lpthread +parallax: $(LIB) $(PLX_SRCS) + $(CC) $(CFLAGS) -o parallax -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap $(PLX_SRCS) libgnbd/libgnbd.a + +vdi_test: $(LIB) $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_test -DVDI_STANDALONE $(VDI_SRCS) + +vdi_list: $(LIB) vdi_list.c $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_list vdi_list.c $(VDI_SRCS) + +vdi_create: $(LIB) vdi_create.c $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_create vdi_create.c $(VDI_SRCS) + +vdi_snap: $(LIB) vdi_snap.c $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_snap vdi_snap.c $(VDI_SRCS) + +vdi_snap_list: $(LIB) vdi_snap_list.c $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_snap_list vdi_snap_list.c $(VDI_SRCS) + +vdi_tree: $(LIB) vdi_tree.c $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_tree vdi_tree.c $(VDI_SRCS) + +vdi_fill: $(LIB) vdi_fill.c $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_fill vdi_fill.c $(VDI_SRCS) + +vdi_validate: $(LIB) vdi_validate.c $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o vdi_validate vdi_validate.c $(VDI_SRCS) + + +rdx_cmp: $(LIB) rdx_cmp.c $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o rdx_cmp rdx_cmp.c $(VDI_SRCS) + + .PHONY: TAGS clean install mk-symlinks rpm TAGS: etags -t $(SRCS) *.h diff --git a/tools/blktap/README-PARALLAX b/tools/blktap/README-PARALLAX new file mode 100644 index 0000000000..ace05fdb41 --- /dev/null +++ b/tools/blktap/README-PARALLAX @@ -0,0 +1,177 @@ +Parallax Quick Overview +March 3, 2005 + +This is intended to provide a quick set of instructions to let you +guys play with the current parallax source. In it's current form, the +code will let you run an arbitrary number of VMs off of a single disk +image, doing copy-on-write as they make updates. Each domain is +assigned a virtual disk image (VDI), which may be based on a snapshot +of an existing image. All of the VDI and snapshot management should +currently work. + +The current implementation uses a single file as a blockstore for +_everything_ this will soon be replaced by the fancier backend code +and the local cache. As it stands, Parallax will create +"blockstore.dat" in the directory that you run it from, and use +largefile support to make this grow to unfathomable girth. So, you +probably want to run the daemon off of a local disk, with a lot of +free space. + +Here's how to get going: + +0. Setup: +--------- + +Pick a local directory on a disk with lots of room. You should be +running from a privileged domain (e.g. dom0) with the blocktap +configured in and block backend NOT. + +For convenience (for the moment) copy all of the vdi tools (vdi_*) and +the parallax daemon from tools/blktap into this directory. + +1. Populate the blockstore: +--------------------------- + +First you need to put at least one image into the blockstore. You +will need a disk image, either as a file or local partition. My +general approach has been to + +(a) make a really big sparse file with + + dd if=/dev/zero of=./image bs=4K count=1 seek=[big value] + +(b) put a filesystem into it + + mkfs.ext3 ./image + +(c) mount it using loopback + + mkdir ./mnt + mount -o loop ./image + +(d) cd into it and untar one of the image files from srg-roots. + + cd mnt + tar ... + +NOTE: Beware if your system is FC3. mkfs is not compatible with old +versions of fedora, and so you don't have much choice but to install +further fc3 images if you have used the fc3 version of mkfs. + +(e) unmount the image + + cd .. + umount mnt + +(f) now, create a new VDI to hold the image + + ./vdi_create "My new FC3 VDI" + +(g) get the id of the new VDI. + + ./vdi_list + + | 0 My new FC3 VDI + +(0 is the VDI id... create a few more if you want.) + +(h) hoover your image into the new VDI. + + ./vdi_fill 0 ./image + +This will pull the entire image into the blockstore and set up a +mapping tree for it for VDI 0. Passing a device (i.e. /dev/sda3) +should also work, but vdi_fill has NO notion of sparseness yet, so you +are going to pump a block into the store for each block you read. + +vdi_fill will count up until it is done, and you should be ready to +go. If you want to be anal, you can use vdi_validate to test the VDI +against the original image. + +2. Create some extra VDIs +------------------------- + +VDIs are actually a list of snapshots, and each snapshot is a full +image of mappings. So, to preserve an immutable copy of a current +VDI, do this: + +(a) Snapshot your new VDI. + + ./vdi_snap 0 + +Snapshotting writes the current radix root to the VDI's snapshot log, +and assigns it a new writable root. + +(b) look at the VDI's snapshot log. + + ./vdi_snap_list 0 + + | 16 0 Thu Mar 3 19:27:48 2005 565111 31 + +The first two columns constitute a snapshot id and represent the +(block, offset) of the snapshot record. The Date tells you when the +snapshot was made, and 31 is the radix root node of the snapshot. + +(c) Create a new VDI, based on that snapshot, and look at the list. + + ./vdi_create "FC3 - Copy 1" 16 0 + ./vdi_list + + | 0 My new FC3 VDI + | 1 FC3 - Copy 1 + +NOTE: If you have Graphviz installed on your system, you can use +vdi_tree to generate a postscript of your current set of VDIs and +snapshots. + + +Create as many VDIs as you need for the VMs that you want to run. + +3. Boot some VMs: +----------------- + +Parallax currently uses a hack in xend to pass the VDI id, you need to +modify the disk line of the VM config that is going to mount it. + +(a) set up your vm config, by using the following disk line: + + disk = ['parallax:1,sda1,w,0' ] + +This example uses VDI 1 (from vdi_list above), presents it as sda1 +(writable), and uses dom 0 as the backend. If you were running the +daemon (and tap driver) in some domain other than 0, you would change +this last parameter. + +NOTE: You'll need to have reinstalled xend/tools prior to booting the vm, so that it knows what to do with "parallax:". + +(b) Run parallax in the backend domain. + + ./parallax + +(c) create your new domain. + + xm create ... + +--- + +That's pretty much all there is to it at the moment. Hope this is +clear enough to get you going. Now, a few serious caveats that will +be sorted out in the almost immediate future: + +WARNINGS: +--------- + +1. There is NO locking in the VDI tools at the moment, so I'd avoid +running them in parallel, or more importantly, running them while the +daemon is running. + +2. I doubt that xend will be very happy about restarting if you have +parallax-using domains. So if it dies while there are active parallax +doms, you may need to reboot. + +3. I've turned off write-in-place. So at the moment, EVERY block +write is a log append on the blockstore. I've been having some probs +with the radix tree's marking of writable blocks after snapshots and +will sort this out very soon. + + diff --git a/tools/blktap/blktaplib.h b/tools/blktap/blktaplib.h index 7b38f565fc..5225dcc270 100644 --- a/tools/blktap/blktaplib.h +++ b/tools/blktap/blktaplib.h @@ -8,6 +8,9 @@ #ifndef __BLKTAPLIB_H__ #define __BLKTAPLIB_H__ +#ifndef __SHORT_INT_TYPES__ +#define __SHORT_INT_TYPES__ + #include typedef uint8_t u8; @@ -18,6 +21,8 @@ typedef int8_t s8; typedef int16_t s16; typedef int32_t s32; typedef int64_t s64; + +#endif /* __SHORT_INT_TYPES__ */ #if defined(__i386__) #define rmb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" ) diff --git a/tools/blktap/blockstore.c b/tools/blktap/blockstore.c new file mode 100644 index 0000000000..179fcdc3c3 --- /dev/null +++ b/tools/blktap/blockstore.c @@ -0,0 +1,135 @@ +/************************************************************************** + * + * blockstore.c + * + * Simple block store interface + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include "blockstore.h" + +static int block_fp = -1; + +/** + * readblock: read a block from disk + * @id: block id to read + * + * @return: pointer to block, NULL on error + */ + +void *readblock(u64 id) { + void *block; + if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) { + printf ("%Ld\n", (id - 1) * BLOCK_SIZE); + perror("readblock lseek"); + return NULL; + } + if ((block = malloc(BLOCK_SIZE)) == NULL) { + perror("readblock malloc"); + return NULL; + } + if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) { + perror("readblock read"); + free(block); + return NULL; + } + return block; +} + +/** + * writeblock: write an existing block to disk + * @id: block id + * @block: pointer to block + * + * @return: zero on success, -1 on failure + */ +int writeblock(u64 id, void *block) { + if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) { + perror("writeblock lseek"); + return -1; + } + if (write(block_fp, block, BLOCK_SIZE) < 0) { + perror("writeblock write"); + return -1; + } + return 0; +} + +/** + * allocblock: write a new block to disk + * @block: pointer to block + * + * @return: new id of block on disk + */ +static u64 lastblock = 0; + +u64 allocblock(void *block) { + u64 lb; + off64_t pos = lseek64(block_fp, 0, SEEK_END); + if (pos == (off64_t)-1) { + perror("allocblock lseek"); + return 0; + } + if (pos % BLOCK_SIZE != 0) { + fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE); + return 0; + } + if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) { + perror("allocblock write"); + return 0; + } + lb = pos / BLOCK_SIZE + 1; + + if (lb <= lastblock) + printf("[*** %Ld alredy allocated! ***]\n", lb); + + lastblock = lb; + return lb; +} + + +/** + * newblock: get a new in-memory block set to zeros + * + * @return: pointer to new block, NULL on error + */ +void *newblock() { + void *block = malloc(BLOCK_SIZE); + if (block == NULL) { + perror("newblock"); + return NULL; + } + memset(block, 0, BLOCK_SIZE); + return block; +} + + +/** + * freeblock: unallocate an in-memory block + * @id: block id (zero if this is only in-memory) + * @block: block to be freed + */ +void freeblock(void *block) { + if (block != NULL) + free(block); +} + + +int __init_blockstore(void) +{ + block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644); + + if (block_fp < 0) { + perror("open"); + return -1; + } + + return 0; +} diff --git a/tools/blktap/blockstore.h b/tools/blktap/blockstore.h new file mode 100644 index 0000000000..3ccd7ab65d --- /dev/null +++ b/tools/blktap/blockstore.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * blockstore.h + * + * Simple block store interface + * + */ + +#ifndef __BLOCKSTORE_H__ +#define __BLOCKSTORE_H__ + +#ifndef __SHORT_INT_TYPES__ +#define __SHORT_INT_TYPES__ + +#include + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; +typedef int8_t s8; +typedef int16_t s16; +typedef int32_t s32; +typedef int64_t s64; + +#endif /* __SHORT_INT_TYPES__ */ + +#define BLOCK_SIZE 4096 +#define BLOCK_SHIFT 12 +#define BLOCK_MASK 0xfffffffffffff000LL + +/* XXX SMH: where is the below supposed to be defined???? */ +#ifndef SECTOR_SHIFT +#define SECTOR_SHIFT 9 +#endif + + +extern void *newblock(); +extern void *readblock(u64 id); +extern u64 allocblock(void *block); +extern int writeblock(u64 id, void *block); +extern void freeblock(void *block); +extern int __init_blockstore(void); + +#endif /* __BLOCKSTORE_H__ */ diff --git a/tools/blktap/parallax.c b/tools/blktap/parallax.c new file mode 100644 index 0000000000..24188e692b --- /dev/null +++ b/tools/blktap/parallax.c @@ -0,0 +1,498 @@ +/************************************************************************** + * + * parallax.c + * + * The Parallax Storage Server + * + */ + + +#include +#include +#include +#include "blktaplib.h" +#include "blockstore.h" +#include "vdi.h" + +#define PARALLAX_DEV 61440 + +#if 1 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif + +/* ------[ session records ]----------------------------------------------- */ + +#define BLKIF_HASHSZ 1024 +#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1)) + +#define VDI_HASHSZ 16 +#define VDI_HASH(_vd) ((((_vd)>>8)^(_vd))&(VDI_HASHSZ-1)) + +typedef struct blkif { + domid_t domid; + unsigned int handle; + enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; + vdi_t *vdi_hash[VDI_HASHSZ]; + struct blkif *hash_next; +} blkif_t; + +static blkif_t *blkif_hash[BLKIF_HASHSZ]; + +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) +{ + if ( handle != 0 ) + printf("blktap/parallax don't currently support non-0 dev handles!\n"); + + blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; + while ( (blkif != NULL) && + ((blkif->domid != domid) || (blkif->handle != handle)) ) + blkif = blkif->hash_next; + return blkif; +} + +vdi_t *blkif_get_vdi(blkif_t *blkif, blkif_vdev_t device) +{ + vdi_t *vdi = blkif->vdi_hash[VDI_HASH(device)]; + + while ((vdi != NULL) && (vdi->vdevice != device)) + vdi = vdi->next; + + return vdi; +} + +/* ------[ control message handling ]-------------------------------------- */ + +void blkif_create(blkif_be_create_t *create) +{ + domid_t domid = create->domid; + unsigned int handle = create->blkif_handle; + blkif_t **pblkif, *blkif; + + DPRINTF("parallax (blkif_create): create is %p\n", create); + + if ( (blkif = (blkif_t *)malloc(sizeof(blkif_t))) == NULL ) + { + DPRINTF("Could not create blkif: out of memory\n"); + create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; + return; + } + + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + blkif->handle = handle; + blkif->status = DISCONNECTED; +/* + spin_lock_init(&blkif->vbd_lock); + spin_lock_init(&blkif->blk_ring_lock); + atomic_set(&blkif->refcnt, 0); +*/ + pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; + while ( *pblkif != NULL ) + { + if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) + { + DPRINTF("Could not create blkif: already exists\n"); + create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS; + free(blkif); + return; + } + pblkif = &(*pblkif)->hash_next; + } + + blkif->hash_next = *pblkif; + *pblkif = blkif; + + DPRINTF("Successfully created blkif\n"); + create->status = BLKIF_BE_STATUS_OKAY; +} + +void blkif_destroy(blkif_be_destroy_t *destroy) +{ + domid_t domid = destroy->domid; + unsigned int handle = destroy->blkif_handle; + blkif_t **pblkif, *blkif; + + DPRINTF("parallax (blkif_destroy): destroy is %p\n", destroy); + + pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; + while ( (blkif = *pblkif) != NULL ) + { + if ( (blkif->domid == domid) && (blkif->handle == handle) ) + { + if ( blkif->status != DISCONNECTED ) + goto still_connected; + goto destroy; + } + pblkif = &blkif->hash_next; + } + + destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; + return; + + still_connected: + destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; + return; + + destroy: + *pblkif = blkif->hash_next; + /* destroy_all_vbds(blkif); */ + free(blkif); + destroy->status = BLKIF_BE_STATUS_OKAY; +} + +void vbd_grow(blkif_be_vbd_grow_t *grow) +{ + blkif_t *blkif; + vdi_t *vdi, **vdip; + blkif_vdev_t vdevice = grow->vdevice; + + DPRINTF("parallax (vbd_grow): grow=%p\n", grow); + + blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle); + if ( blkif == NULL ) + { + DPRINTF("vbd_grow attempted for non-existent blkif (%u,%u)\n", + grow->domid, grow->blkif_handle); + grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; + return; + } + + /* VDI identifier is in grow->extent.sector_start */ + DPRINTF("vbd_grow: grow->extent.sector_start (id) is %llx\n", + grow->extent.sector_start); + + vdi = vdi_get(grow->extent.sector_start); + if (vdi == NULL) + { + printf("parallax (vbd_grow): VDI %llx not found.\n", + grow->extent.sector_start); + grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; + return; + } + + vdi->next = NULL; + vdi->vdevice = vdevice; + vdip = &blkif->vdi_hash[VDI_HASH(vdevice)]; + while (*vdip != NULL) + vdip = &(*vdip)->next; + *vdip = vdi; + + DPRINTF("vbd_grow: happy return!\n"); + grow->status = BLKIF_BE_STATUS_OKAY; +} + +int parallax_control(control_msg_t *msg) +{ + domid_t domid; + int ret; + + DPRINTF("parallax_control: msg is %p\n", msg); + + if (msg->type != CMSG_BLKIF_BE) + { + printf("Unexpected control message (%d)\n", msg->type); + return 0; + } + + switch(msg->subtype) + { + case CMSG_BLKIF_BE_CREATE: + if ( msg->length != sizeof(blkif_be_create_t) ) + goto parse_error; + blkif_create((blkif_be_create_t *)msg->msg); + break; + + case CMSG_BLKIF_BE_DESTROY: + if ( msg->length != sizeof(blkif_be_destroy_t) ) + goto parse_error; + blkif_destroy((blkif_be_destroy_t *)msg->msg); + break; + + case CMSG_BLKIF_BE_VBD_GROW: + if ( msg->length != sizeof(blkif_be_vbd_grow_t) ) + goto parse_error; + vbd_grow((blkif_be_vbd_grow_t *)msg->msg); + break; + } + return 0; +parse_error: + printf("Bad control message!\n"); + return 0; + +} + +int parallax_probe(blkif_request_t *req, blkif_t *blkif) +{ + blkif_response_t *rsp; + vdisk_t *img_info; + vdi_t *vdi; + int i, nr_vdis = 0; + + DPRINTF("parallax_probe: req=%p, blkif=%p\n", req, blkif); + + /* We expect one buffer only. */ + if ( req->nr_segments != 1 ) + goto err; + + /* Make sure the buffer is page-sized. */ + if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) || + (blkif_last_sect (req->frame_and_sects[0]) != 7) ) + goto err; + + /* fill the list of devices */ + for (i=0; ivdi_hash[i]; + while (vdi) { + img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0); + img_info[nr_vdis].device = vdi->vdevice; + img_info[nr_vdis].info = VDISK_TYPE_DISK | VDISK_FLAG_VIRT; + /* The -2 here accounts for the LSB in the radix tree */ + img_info[nr_vdis].capacity = + ((1LL << (VDI_HEIGHT-2)) >> SECTOR_SHIFT); + nr_vdis++; + vdi = vdi->next; + } + } + + + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = BLKIF_OP_PROBE; + rsp->status = nr_vdis; /* number of disks */ + + DPRINTF("parallax_probe: send positive response (nr_vdis=%d)\n", nr_vdis); + return BLKTAP_RESPOND; +err: + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = BLKIF_OP_PROBE; + rsp->status = BLKIF_RSP_ERROR; + + DPRINTF("parallax_probe: send error response\n"); + return BLKTAP_RESPOND; +} + +int parallax_read(blkif_request_t *req, blkif_t *blkif) +{ + blkif_response_t *rsp; + unsigned long size, offset, start; + u64 sector; + u64 vblock, gblock; + vdi_t *vdi; + int i; + char *dpage, *spage; + + vdi = blkif_get_vdi(blkif, req->device); + + if ( vdi == NULL ) + goto err; + + for (i = 0; i < req->nr_segments; i++) { + + dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); + + /* Round the requested segment to a block address. */ + + sector = req->sector_number + (8*i); + vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT; + + /* Get that block from the store. */ + + gblock = vdi_lookup_block(vdi, vblock, NULL); + + /* Calculate read size and offset within the read block. */ + + offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE; + size = ( blkif_last_sect (req->frame_and_sects[i]) - + blkif_first_sect(req->frame_and_sects[i]) + 1 + ) << SECTOR_SHIFT; + start = blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; + + /* If the block does not exist in the store, return zeros. */ + /* Otherwise, copy that region to the guest page. */ + + DPRINTF("ParallaxRead: sect: %lld (%ld,%ld), " + "vblock %llx, gblock %llx, " + "size %lx\n", + sector, blkif_first_sect(req->frame_and_sects[i]), + blkif_last_sect (req->frame_and_sects[i]), + vblock, gblock, size); + + if ( gblock == 0 ) { + + memset(dpage + start, '\0', size); + + } else { + + spage = readblock(gblock); + + if (spage == NULL) { + printf("Error reading gblock from store: %Ld\n", gblock); + goto err; + } + + memcpy(dpage + start, spage + offset, size); + + freeblock(spage); + } + + } + + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = BLKIF_OP_WRITE; + rsp->status = BLKIF_RSP_OKAY; + + return BLKTAP_RESPOND; +err: + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = BLKIF_OP_WRITE; + rsp->status = BLKIF_RSP_ERROR; + + return BLKTAP_RESPOND; +} + +int parallax_write(blkif_request_t *req, blkif_t *blkif) +{ + blkif_response_t *rsp; + u64 sector; + int i, writable = 0; + u64 vblock, gblock; + char *spage; + unsigned long size, offset, start; + vdi_t *vdi; + + vdi = blkif_get_vdi(blkif, req->device); + + if ( vdi == NULL ) + goto err; + + for (i = 0; i < req->nr_segments; i++) { + + spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); + + /* Round the requested segment to a block address. */ + + sector = req->sector_number + (8*i); + vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT; + + /* Get that block from the store. */ + + gblock = vdi_lookup_block(vdi, vblock, &writable); + + /* Calculate read size and offset within the read block. */ + + offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE; + size = ( blkif_last_sect (req->frame_and_sects[i]) - + blkif_first_sect(req->frame_and_sects[i]) + 1 + ) << SECTOR_SHIFT; + start = blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; +/* +if (( gblock != 0 ) && ( writable == 0 )) printf("*"); +*/ + DPRINTF("ParallaxWrite: sect: %lld (%ld,%ld), " + "vblock %llx, gblock %llx, " + "size %lx\n", + sector, blkif_first_sect(req->frame_and_sects[i]), + blkif_last_sect (req->frame_and_sects[i]), + vblock, gblock, size); + + /* XXX: For now we just freak out if they try to write a */ + /* non block-sized, block-aligned page. */ + + if ((offset != 0) || (size != BLOCK_SIZE) || (start != 0)) { + printf("]\n] STRANGE WRITE!\n]\n"); + goto err; + } +/* Disable write-in-place till radix is sorted out. + if (( gblock == 0 ) || ( writable == 0 )) { +*/ + gblock = allocblock(spage); + vdi_update_block(vdi, vblock, gblock); +#if 0 + } else { + + /* write-in-place, no need to change mappings. */ + writeblock(gblock, spage); + + } +#endif + } + + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = BLKIF_OP_WRITE; + rsp->status = BLKIF_RSP_OKAY; + + return BLKTAP_RESPOND; +err: + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = BLKIF_OP_WRITE; + rsp->status = BLKIF_RSP_ERROR; + + return BLKTAP_RESPOND; +} + +int parallax_request(blkif_request_t *req) +{ + blkif_response_t *rsp; + domid_t dom = ID_TO_DOM(req->id); + blkif_t *blkif = blkif_find_by_handle(dom, 0); + + //DPRINTF("parallax_request: req=%p, dom=%d, blkif=%p\n", req, dom, blkif); + + if (blkif == NULL) + goto err; + + if ( req->operation == BLKIF_OP_PROBE ) { + + return parallax_probe(req, blkif); + + } else if ( req->operation == BLKIF_OP_READ ) { + + return parallax_read(req, blkif); + + } else if ( req->operation == BLKIF_OP_WRITE ) { + + return parallax_write(req, blkif); + + } else { + /* Unknown operation */ + goto err; + } + +err: + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = req->operation; + rsp->status = BLKIF_RSP_ERROR; + return BLKTAP_RESPOND; +} + +void __init_parallax(void) +{ + memset(blkif_hash, 0, sizeof(blkif_hash)); +} + + +int main(int argc, char *argv[]) +{ + DPRINTF("parallax: starting.\n"); + __init_blockstore(); + DPRINTF("parallax: initialized blockstore...\n"); + __init_vdi(); + DPRINTF("parallax: initialized vdi registry etc...\n"); + __init_parallax(); + DPRINTF("parallax: initialized local stuff..\n"); + + blktap_register_ctrl_hook("parallax_control", parallax_control); + blktap_register_request_hook("parallax_request", parallax_request); + DPRINTF("parallax: added ctrl + request hooks, starting listen...\n"); + blktap_listen(); + + return 0; +} diff --git a/tools/blktap/radix.c b/tools/blktap/radix.c new file mode 100644 index 0000000000..1174871010 --- /dev/null +++ b/tools/blktap/radix.c @@ -0,0 +1,362 @@ +/* + * Radix tree for mapping (up to) 63-bit virtual block IDs to + * 63-bit global block IDs + * + * Pointers within the tree set aside the least significant bit to indicate + * whther or not the target block is writable from this node. + * + * The block with ID 0 is assumed to be an empty block of all zeros + */ + +#include +#include +#include +#include +#include +#include "blockstore.h" +#include "radix.h" + +#define RADIX_TREE_MAP_SHIFT 9 +#define RADIX_TREE_MAP_MASK 0x1ff +#define RADIX_TREE_MAP_ENTRIES 512 + +/* +#define DEBUG +*/ + +#define ZERO 0LL +#define ONE 1LL +#define ONEMASK 0xffffffffffffffeLL + + +typedef u64 *radix_tree_node; + +/* + * block device interface and other helper functions + * with these functions, block id is just a 63-bit number, with + * no special consideration for the LSB + */ +radix_tree_node cloneblock(radix_tree_node block); + +/* + * main api + * with these functions, the LSB of root always indicates + * whether or not the block is writable, including the return + * values of update and snapshot + */ +u64 lookup(int height, u64 root, u64 key); +u64 update(int height, u64 root, u64 key, u64 val); +u64 snapshot(u64 root); + +/** + * cloneblock: clone an existing block in memory + * @block: the old block + * + * @return: new block, with LSB cleared for every entry + */ +radix_tree_node cloneblock(radix_tree_node block) { + radix_tree_node node = (radix_tree_node) malloc(BLOCK_SIZE); + int i; + if (node == NULL) { + perror("cloneblock malloc"); + return NULL; + } + for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++) + node[i] = block[i] & ONEMASK; + return node; +} + +/** + * lookup: find a value given a key + * @height: height in bits of the radix tree + * @root: root node id, with set LSB indicating writable node + * @key: key to lookup + * + * @return: value on success, zero on error + */ +u64 lookup(int height, u64 root, u64 key) { + radix_tree_node node; + + assert(key >> height == 0); + + /* the root block may be smaller to ensure all leaves are full */ + height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT; + + /* now carve off equal sized chunks at each step */ + for (;;) { + u64 oldroot; + +#ifdef DEBUG + printf("lookup: height=%3d root=%3Ld offset=%3d%s\n", height, root, + (int) ((key >> height) & RADIX_TREE_MAP_MASK), + (iswritable(root) ? "" : " (readonly)")); +#endif + + if (getid(root) == ZERO) + return ZERO; + + oldroot = root; + node = (radix_tree_node) readblock(getid(root)); + if (node == NULL) + return ZERO; + + root = node[(key >> height) & RADIX_TREE_MAP_MASK]; + freeblock(node); + + if (height == 0) + return root; + + height -= RADIX_TREE_MAP_SHIFT; + } + + return ZERO; +} + +/* + * update: set a radix tree entry, doing copy-on-write as necessary + * @height: height in bits of the radix tree + * @root: root node id, with set LSB indicating writable node + * @key: key to set + * @val: value to set, s.t. radix(key)=val + * + * @returns: (possibly new) root id on success (with LSB=1), 0 on failure + */ +u64 update(int height, u64 root, u64 key, u64 val) { + int offset; + u64 child; + radix_tree_node node; + + /* base case--return val */ + if (height == 0) + return val; + + /* the root block may be smaller to ensure all leaves are full */ + height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT; + offset = (key >> height) & RADIX_TREE_MAP_MASK; + +#ifdef DEBUG + printf("update: height=%3d root=%3Ld offset=%3d%s\n", height, root, + offset, (iswritable(root)?"":" (clone)")); +#endif + + /* load a block, or create a new one */ + if (root == ZERO) { + node = (radix_tree_node) newblock(); + } else { + node = (radix_tree_node) readblock(getid(root)); + + if (!iswritable(root)) { + /* need to clone this node */ + radix_tree_node oldnode = node; + node = cloneblock(node); + freeblock(oldnode); + root = ZERO; + } + } + + if (node == NULL) { +#ifdef DEBUG + printf("update: node is null!\n"); +#endif + return ZERO; + } + + child = update(height, node[offset], key, val); + + if (child == ZERO) { + freeblock(node); + return ZERO; + } else if (child == node[offset]) { + /* no change, so we already owned the child */ + assert(iswritable(root)); + + freeblock(node); + return root; + } + + node[offset] = child; + + /* new/cloned blocks need to be saved */ + if (root == ZERO) { + /* mark this as an owned block */ + root = allocblock(node); + if (root) + root = writable(root); + } else if (writeblock(getid(root), node) < 0) { + freeblock(node); + return ZERO; + } + + freeblock(node); + return root; +} + +/** + * snapshot: create a snapshot + * @root: old root node + * + * @return: new root node, 0 on error + */ +u64 snapshot(u64 root) { + radix_tree_node node, newnode; + + if ((node = readblock(getid(root))) == NULL) + return ZERO; + + newnode = cloneblock(node); + freeblock(node); + if (newnode == NULL) + return ZERO; + + root = allocblock(newnode); + freeblock(newnode); + + if (root == ZERO) + return ZERO; + else + return writable(root); +} + +void print_root(u64 root, int height, u64 val, FILE *dot_f) +{ + FILE *f; + int i; + radix_tree_node node; + char *style[2] = { "", "style=bold,color=blue," }; + + if (dot_f == NULL) { + f = fopen("radix.dot", "w"); + if (f == NULL) { + perror("print_root: open"); + return; + } + + /* write graph preamble */ + fprintf(f, "digraph G {\n"); + + /* add a node for this root. */ + fprintf(f, " n%Ld [%sshape=box,label=\"%Ld\"];\n", + getid(root), style[iswritable(root)], getid(root)); + } + + /* base case--return val */ + if (height == 0) { + /* add a node and edge for each child root */ + node = (radix_tree_node) readblock(getid(root)); + if (node == NULL) + return; + + for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++) { + if (node[i] != 0) { + fprintf(f, " n%Ld [%sshape=box,label=\"%Ld\"];\n", + getid(node[i]), style[iswritable(node[i])], + getid(node[i])); + fprintf(f, " n%Ld -> n%Ld [label=\"%d\"]\n", getid(root), + getid(node[i]), i); + } + } + return; + } + + /* the root block may be smaller to ensure all leaves are full */ + height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT; + + if (getid(root) == ZERO) + return; + + node = (radix_tree_node) readblock(getid(root)); + if (node == NULL) + return; + + /* add a node and edge for each child root */ + for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++) + if (node[i] != 0) { + fprintf(f, " n%Ld [%sshape=box,label=\"%Ld\"];\n", + getid(node[i]), style[iswritable(node[i])], + getid(node[i])); + print_root(node[i], height-RADIX_TREE_MAP_SHIFT, + val + (((u64)i)< n%Ld [label=\"%d\"]\n", getid(root), + getid(node[i]), i); + } + + /* + + root = node[(key >> height) & RADIX_TREE_MAP_MASK]; + freeblock(state, getid(oldroot), node); + + if (height == 0) + return root; + + height -= RADIX_TREE_MAP_SHIFT; + */ + //} + + + /* write graph postamble */ + if (dot_f == NULL) { + fprintf(f, "}\n"); + fclose(f); + } +} + +#ifdef RADIX_STANDALONE + +int main(int argc, char **argv) { + u64 key = ZERO, val = ZERO; + u64 root = writable(ONE); + char buff[4096]; + + __init_blockstore(); + + memset(buff, 0, 4096); + /*fp = open("radix.dat", O_RDWR | O_CREAT, 0644); + + if (fp < 3) { + perror("open"); + return -1; + } + if (lseek(fp, 0, SEEK_END) == 0) { + write(fp, buff, 4096); + }*/ + + printf("Recognized commands:\n" + "Note: the LSB of a node number indicates if it is writable\n" + " root set root to \n" + " snapshot take a snapshot of the root\n" + " set set key=val\n" + " get query key\n" + " quit\n" + "\nroot = %Ld\n", root); + for (;;) { + print_root(root, 34, 0, NULL); + system("dot radix.dot -Tps -o radix.ps"); + + printf("> "); + fflush(stdout); + fgets(buff, 1024, stdin); + if (feof(stdin)) + break; + if (sscanf(buff, " root %Ld", &root) == 1) { + printf("root set to %Ld\n", root); + } else if (sscanf(buff, " set %Ld %Ld", &key, &val) == 2) { + root = update(34, root, key, val); + printf("root = %Ld\n", root); + } else if (sscanf(buff, " get %Ld", &key) == 1) { + val = lookup(34, root, key, NULL); + printf("value = %Ld\n", val); + } else if (!strcmp(buff, "quit\n")) { + break; + } else if (!strcmp(buff, "snapshot\n")) { + root = snapshot(root); + printf("new root = %Ld\n", root); + } else if (sscanf(buff, " pr %Ld", &root) == 1) { + print_root(root, 34, 0, NULL); + } else { + printf("command not recognized\n"); + } + } + return 0; +} + +#endif diff --git a/tools/blktap/radix.h b/tools/blktap/radix.h new file mode 100644 index 0000000000..8cca98f7ef --- /dev/null +++ b/tools/blktap/radix.h @@ -0,0 +1,31 @@ +/* + * Radix tree for mapping (up to) 63-bit virtual block IDs to + * 63-bit global block IDs + * + * Pointers within the tree set aside the least significant bit to indicate + * whther or not the target block is writable from this node. + * + * The block with ID 0 is assumed to be an empty block of all zeros + */ + +#ifndef __RADIX_H__ +#define __RADIX_H__ + +/* I don't really like exposing these, but... */ +#define getid(x) (((x)>>1)&0x7fffffffffffffffLL) +#define putid(x) ((x)<<1) +#define writable(x) (((x)<<1)|1LL) +#define iswritable(x) ((x)&1LL) + +/* + * main api + * with these functions, the LSB of root always indicates + * whether or not the block is writable, including the return + * values of update and snapshot + */ +u64 lookup(int height, u64 root, u64 key); +u64 update(int height, u64 root, u64 key, u64 val); +u64 snapshot(u64 root); +int isprivate(int height, u64 root, u64 key); + +#endif /* __RADIX_H__ */ diff --git a/tools/blktap/snaplog.c b/tools/blktap/snaplog.c new file mode 100644 index 0000000000..0647f1757a --- /dev/null +++ b/tools/blktap/snaplog.c @@ -0,0 +1,173 @@ +/************************************************************************** + * + * snaplog.c + * + * Snapshot log on-disk data structure. + * + */ + + /* VDI histories are made from chains of snapshot logs. These logs record + * the (radix) root and timestamp of individual snapshots. + * + * creation of a new VDI involves 'forking' a snapshot log, by creating a + * new, empty log (in a new VDI) and parenting it off of a record in an + * existing snapshot log. + * + * snapshot log blocks have at most one writer. + */ + +#include +#include +#include +#include "blockstore.h" +#include "snaplog.h" + + + +snap_block_t *snap_get_block(u64 block) +{ + snap_block_t *blk = (snap_block_t *)readblock(block); + + if ( blk == NULL) + return NULL; + if ( blk->hdr.magic != SNAP_MAGIC ) { + freeblock(blk); + return NULL; + } + + return blk; +} + +int snap_get_id(snap_id_t *id, snap_rec_t *target) +{ + snap_block_t *blk; + + if ( id == NULL ) + return -1; + + blk = snap_get_block(id->block); + + if ( blk == NULL ) + return -1; + + if ( id->index > blk->hdr.nr_entries ) { + freeblock(blk); + return -1; + } + + *target = blk->snaps[id->index]; + freeblock(blk); + return 0; +} + +int __snap_block_create(snap_id_t *parent_id, snap_id_t *fork_id, + snap_id_t *new_id) +{ + snap_rec_t parent_rec, fork_rec; + snap_block_t *blk, *pblk; + /* + if ( (parent_id != NULL) && (snap_get_id(parent_id, &parent_rec) != 0) ) + return -1; + + if ( (fork_id != NULL) && (snap_get_id(fork_id, &fork_rec) != 0) ) + return -1; +*/ + blk = (snap_block_t *)newblock(); + blk->hdr.magic = SNAP_MAGIC; + blk->hdr.nr_entries = 0; + blk->hdr.log_entries = 0; + blk->hdr.immutable = 0; + + if ( (parent_id != NULL) + && (parent_id->block != fork_id->block) + && (parent_id->block != 0)) { + + pblk = snap_get_block(parent_id->block); + blk->hdr.log_entries = pblk->hdr.log_entries; + freeblock(pblk); + } + + if (parent_id != NULL) { + blk->hdr.parent_block = *parent_id; + blk->hdr.fork_block = *fork_id; + } else { + blk->hdr.parent_block = null_snap_id; + blk->hdr.fork_block = null_snap_id; + } + + new_id->index = 0; + new_id->block = allocblock(blk); + if (new_id->block == 0) + return -1; + + return 0; +} + +int snap_block_create(snap_id_t *parent_id, snap_id_t *new_id) +{ + return __snap_block_create(parent_id, parent_id, new_id); +} + +int snap_append(snap_id_t *old_id, snap_rec_t *rec, snap_id_t *new_id) +{ + snap_id_t id = *old_id; + snap_block_t *blk = snap_get_block(id.block); + + if ( blk->hdr.immutable != 0 ) { + printf("Attempt to snap an immutable snap block!\n"); + return -1; + } + + new_id->block = id.block; + + if (blk->hdr.nr_entries == SNAPS_PER_BLOCK) { + int ret; + + id.index--; /* make id point to the last full record */ + + ret = __snap_block_create(&id, &blk->hdr.fork_block, new_id); + if ( ret != 0 ) { + freeblock(blk); + return -1; + } + + blk->hdr.immutable = 1; + writeblock(id.block, blk); + freeblock(blk); + blk = snap_get_block(new_id->block); + id = *new_id; + } + + blk->snaps[blk->hdr.nr_entries] = *rec; + blk->hdr.nr_entries++; + blk->hdr.log_entries++; + new_id->index = blk->hdr.nr_entries; + //printf("snap: %u %u\n", blk->hdr.nr_entries, blk->hdr.log_entries); + writeblock(id.block, blk); + freeblock(blk); + return 0; +} + +void snap_print_history(snap_id_t *snap_id) +{ + snap_id_t id = *snap_id; + unsigned int idx = id.index; + snap_block_t *new_blk, *blk = snap_get_block(id.block); + + while ( blk ) { + printf("[Snap block %Ld]:\n", id.block); + do { + printf(" %03u: root: %Ld ts: %ld.%ld\n", idx, + blk->snaps[idx].radix_root, + blk->snaps[idx].timestamp.tv_sec, + blk->snaps[idx].timestamp.tv_usec); + } while (idx-- != 0); + + id = blk->hdr.parent_block; + if (id.block != 0) { + new_blk = snap_get_block(id.block); + } + freeblock(blk); + blk = new_blk; + } +} diff --git a/tools/blktap/snaplog.h b/tools/blktap/snaplog.h new file mode 100644 index 0000000000..785dccfc66 --- /dev/null +++ b/tools/blktap/snaplog.h @@ -0,0 +1,52 @@ +/************************************************************************** + * + * snaplog.h + * + * Snapshot log on-disk data structure. + * + */ + +#include "blockstore.h" /* for BLOCK_SIZE */ + +typedef struct snap_id { + u64 block; + unsigned int index; +} snap_id_t; + +typedef struct snap_rec { + u64 radix_root; + struct timeval timestamp; +} snap_rec_t; + + +int snap_block_create(snap_id_t *parent_id, snap_id_t *new_id); +int snap_append(snap_id_t *id, snap_rec_t *rec, snap_id_t *new_id); +void snap_print_history(snap_id_t *snap_id); +int snap_get_id(snap_id_t *id, snap_rec_t *target); + + +/* exported for vdi debugging */ +#define SNAP_MAGIC 0xff00ff0aa0ff00ffLL + +static const snap_id_t null_snap_id = { 0, 0 }; + +typedef struct snap_block_hdr { + u64 magic; + snap_id_t parent_block; /* parent block within this chain */ + snap_id_t fork_block; /* where this log was forked */ + unsigned log_entries; /* total entries since forking */ + unsigned short nr_entries; /* entries in snaps[] */ + unsigned short immutable; /* has this snap page become immutable? */ +} snap_block_hdr_t; + + +#define SNAPS_PER_BLOCK \ + ((BLOCK_SIZE - sizeof(snap_block_hdr_t)) / sizeof(snap_rec_t)) + +typedef struct snap_block { + snap_block_hdr_t hdr; + snap_rec_t snaps[SNAPS_PER_BLOCK]; +} snap_block_t; + + +snap_block_t *snap_get_block(u64 block); diff --git a/tools/blktap/vdi.c b/tools/blktap/vdi.c new file mode 100644 index 0000000000..ea9ebd70da --- /dev/null +++ b/tools/blktap/vdi.c @@ -0,0 +1,353 @@ +/************************************************************************** + * + * vdi.c + * + * Virtual Disk Image (VDI) Interfaces + * + */ + +#include +#include +#include +#include +#include +#include "blockstore.h" +#include "radix.h" +#include "vdi.h" + +#define VDI_REG_BLOCK 1LL +#define VDI_RADIX_ROOT writable(2) + +#if 1 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif + +/* I haven't decided about this registry stuff, so this is just a really + * quick lash-up so that there is some way to track VDIs. + * + * (Most vdi access should be with a direct handle to the block, so this + * registry is just for start-of-day lookup and other control operations.) + */ + +vdi_registry_t *create_vdi_registry(void) +{ + vdi_registry_t *reg = (vdi_registry_t *)newblock(); + + if (reg == NULL) + return NULL; + + /* zero-fill the vdi radix root while we have an empty block. */ + writeblock(VDI_RADIX_ROOT, (void *)reg); + + + DPRINTF("[vdi.c] Creating VDI registry!\n"); + reg->magic = VDI_REG_MAGIC; + reg->nr_vdis = 0; + + writeblock(VDI_REG_BLOCK, (void *)reg); + + return reg; +} + +vdi_registry_t *get_vdi_registry(void) +{ + vdi_registry_t *vdi_reg = (vdi_registry_t *)readblock(VDI_REG_BLOCK); + + if ( vdi_reg == NULL ) + vdi_reg = create_vdi_registry(); + + if ( vdi_reg->magic != VDI_REG_MAGIC ) { + freeblock(vdi_reg); + return NULL; + } + + return vdi_reg; +} + +vdi_t *vdi_create(snap_id_t *parent_snap, char *name) +{ + int ret; + vdi_t *vdi; + vdi_registry_t *vdi_reg; + snap_rec_t snap_rec; + + /* create a vdi struct */ + vdi = newblock(); + if (vdi == NULL) + return NULL; + + if ( snap_get_id(parent_snap, &snap_rec) == 0 ) { + vdi->radix_root = snapshot(snap_rec.radix_root); + } else { + vdi->radix_root = allocblock((void *)vdi); /* vdi is just zeros here */ + vdi->radix_root = writable(vdi->radix_root); /* grr. */ + } + + /* create a snapshot log, and add it to the vdi struct */ + + ret = snap_block_create(parent_snap, &vdi->snap); + if ( ret != 0 ) { + DPRINTF("Error getting snap block in vdi_create.\n"); + freeblock(vdi); + return NULL; + } + + /* append the vdi to the registry, fill block and id. */ + /* implicit allocation means we have to write the vdi twice here. */ + vdi_reg = get_vdi_registry(); + if ( vdi_reg == NULL ) { + freeblock(vdi); + return NULL; + } + + vdi->block = allocblock((void *)vdi); + vdi->id = vdi_reg->nr_vdis++; + strncpy(vdi->name, name, VDI_NAME_SZ); + vdi->name[VDI_NAME_SZ] = '\0'; + writeblock(vdi->block, (void *)vdi); + + update(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi->id, vdi->block); + writeblock(VDI_REG_BLOCK, (void *)vdi_reg); + freeblock(vdi_reg); + + return vdi; +} + +vdi_t *vdi_get(u64 vdi_id) +{ + u64 vdi_blk; + vdi_t *vdi; + + vdi_blk = lookup(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi_id); + + if ( vdi_blk == 0 ) + return NULL; + + vdi = (vdi_t *)readblock(vdi_blk); + return vdi; +} + +u64 vdi_lookup_block(vdi_t *vdi, u64 vdi_block, int *writable) +{ + u64 gblock; + + gblock = lookup(VDI_HEIGHT, vdi->radix_root, vdi_block); + + if (writable != NULL) *writable = iswritable(gblock); +printf("lu: root: %11Ld, gblock: %11Ld, id: %11Ld, wr: %Ld\n", + vdi->radix_root, gblock, getid(gblock), iswritable(gblock)); + + return getid(gblock); +} + +void vdi_update_block(vdi_t *vdi, u64 vdi_block, u64 g_block) +{ + u64 id; + + /* updates are always writable. */ + id = writable(g_block); + + vdi->radix_root = update(VDI_HEIGHT, vdi->radix_root, vdi_block, id); + writeblock(vdi->block, vdi); +} + +void vdi_snapshot(vdi_t *vdi) +{ + snap_rec_t rec; + int ret; + + rec.radix_root = vdi->radix_root; + gettimeofday(&rec.timestamp, NULL); + + vdi->radix_root = snapshot(vdi->radix_root); + ret = snap_append(&vdi->snap, &rec, &vdi->snap); + if ( ret != 0 ) { + printf("snap_append returned failure\n"); + return; + } + writeblock(vdi->block, vdi); +} + +int __init_vdi() +{ + /* force the registry to be created if it doesn't exist. */ + vdi_registry_t *vdi_reg = get_vdi_registry(); + if (vdi_reg == NULL) { + printf("[vdi.c] Couldn't get/create a VDI registry!\n"); + return -1; + } + freeblock(vdi_reg); + + return 0; +} + +#ifdef VDI_STANDALONE + +#define TEST_VDIS 50 +#define NR_ITERS 50000 +#define FORK_POINTS 200 +#define INIT_VDIS 3 +#define INIT_SNAPS 40 + +/* These must be of decreasing size: */ +#define NEW_FORK (RAND_MAX-(RAND_MAX/1000)) +#define NEW_ROOT_VDI (RAND_MAX-((RAND_MAX/1000)*2)) +#define NEW_FORK_VDI (RAND_MAX-((RAND_MAX/1000)*3)) + +#define GRAPH_DOT_FILE "vdi.dot" +#define GRAPH_PS_FILE "vdi.ps" + + +typedef struct sh_st { + snap_id_t id; + struct sh_st *next; +} sh_t; + +#define SNAP_HASHSZ 1024 +sh_t *node_hash[SNAP_HASHSZ]; +#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ) + +#define SNAPID_EQUAL(_a,_b) \ + (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index)) +int sh_check_and_add(snap_id_t *id) +{ + sh_t **s = &node_hash[SNAP_HASH(id)]; + + while (*s != NULL) { + if (SNAPID_EQUAL(&((*s)->id), id)) + return 1; + *s = (*s)->next; + } + + *s = (sh_t *)malloc(sizeof(sh_t)); + (*s)->id = *id; + (*s)->next = NULL; + + return 0; +} + +int main(int argc, char *argv[]) +{ + vdi_t *vdi_list[TEST_VDIS]; + snap_id_t id, fork_points[FORK_POINTS]; + int nr_vdis = 0, nr_forks = 0; + int i, j, r; + FILE *f; + char name[VDI_NAME_SZ]; + + __init_blockstore(); + __init_vdi(); + + printf("[o] Generating seed VDIs. (%d VDIs)\n", INIT_VDIS); + + for (i=0; isnap; + nr_vdis++; + nr_forks++; + } + + printf("[o] Running a random workload. (%d iterations)\n", NR_ITERS); + + for (i=0; i NEW_FORK ) { + if ( nr_forks > FORK_POINTS ) + continue; + id = vdi_list[r%nr_vdis]->snap; + if ( ( id.block == 0 ) || ( id.index == 0 ) ) + continue; + id.index--; + fork_points[nr_forks++] = id; + + } else if ( r > NEW_ROOT_VDI ) { + + if ( nr_vdis == TEST_VDIS ) + continue; + + sprintf(name, "VDI Number %d.", nr_vdis); + vdi_list[nr_vdis++] = vdi_create(NULL, name); + + } else if ( r > NEW_FORK_VDI ) { + + if ( nr_vdis == TEST_VDIS ) + continue; + + sprintf(name, "VDI Number %d.", nr_vdis); + vdi_list[nr_vdis++] = vdi_create(&fork_points[r%nr_forks], name); + + } else /* SNAPSHOT */ { + + vdi_snapshot(vdi_list[r%nr_vdis]); + + } + } + + /* now dump it out to a dot file. */ + printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis); + + f = fopen(GRAPH_DOT_FILE, "w"); + + /* write graph preamble */ + fprintf(f, "digraph G {\n"); + fprintf(f, " rankdir=LR\n"); + + for (i=0; isnap; + int nr_snaps, done=0; + + /* add a node for the id */ +printf("vdi: %d\n", i); + fprintf(f, " n%Ld%d [color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n", + id.block, id.index, vdi_list[i]->name, + id.block, id.index); + sprintf(oldnode, "n%Ld%d", id.block, id.index); + + while (id.block != 0) { + blk = snap_get_block(id.block); + nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index); + id = blk->hdr.fork_block; + + done = sh_check_and_add(&id); + + /* add a node for the fork_id */ + if (!done) { + fprintf(f, " n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n", + id.block, id.index, + id.block, id.index); + } + + /* add an edge between them */ + fprintf(f, " n%Ld%d -> %s [label=\"%u snapshots\"]\n", + id.block, id.index, oldnode, nr_snaps); + sprintf(oldnode, "n%Ld%d", id.block, id.index); + freeblock(blk); + + if (done) break; + } + } + + /* write graph postamble */ + fprintf(f, "}\n"); + fclose(f); + + printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE); + { + char cmd[255]; + sprintf(cmd, "dot %s -Tps -o %s", GRAPH_DOT_FILE, GRAPH_PS_FILE); + system(cmd); + } + return 0; +} + +#endif diff --git a/tools/blktap/vdi.h b/tools/blktap/vdi.h new file mode 100644 index 0000000000..dd32102dad --- /dev/null +++ b/tools/blktap/vdi.h @@ -0,0 +1,48 @@ +/************************************************************************** + * + * vdi.h + * + * Virtual Disk Image (VDI) Interfaces + * + */ + +#ifndef __VDI_H__ +#define __VDI_H__ + +#include "blktaplib.h" +#include "snaplog.h" + +#define VDI_HEIGHT 35 +#define VDI_REG_HEIGHT 35 /* why not? */ + +#define VDI_NAME_SZ 256 + +typedef struct vdi { + u64 id; /* unique vdi id -- used by the registry */ + u64 block; /* block where this vdi lives (also unique)*/ + u64 radix_root; /* radix root node for block mappings */ + snap_id_t snap; /* next snapshot slot for this VDI */ + struct vdi *next; /* used to hash-chain in blkif. */ + blkif_vdev_t vdevice; /* currently mounted as... */ + char name[VDI_NAME_SZ];/* human readable vdi name */ +} vdi_t; + +#define VDI_REG_MAGIC 0xff00ff0bb0ff00ffLL + +typedef struct vdi_registry { + u64 magic; + u64 nr_vdis; +} vdi_registry_t; + + +int __init_vdi(void); + +vdi_t *vdi_get(u64 vdi_id); +vdi_registry_t *get_vdi_registry(void); +vdi_t *vdi_create(snap_id_t *parent_snap, char *name); +u64 vdi_lookup_block(vdi_t *vdi, u64 vdi_block, int *writable); +void vdi_update_block(vdi_t *vdi, u64 vdi_block, u64 g_block); +void vdi_snapshot(vdi_t *vdi); + + +#endif /* __VDI_H__ */ diff --git a/tools/blktap/vdi_create.c b/tools/blktap/vdi_create.c new file mode 100644 index 0000000000..1d40b86c24 --- /dev/null +++ b/tools/blktap/vdi_create.c @@ -0,0 +1,52 @@ +/************************************************************************** + * + * vdi_create.c + * + * Create a new vdi. + * + */ + +#include +#include +#include +#include +#include "blockstore.h" +#include "radix.h" +#include "vdi.h" + +int main(int argc, char *argv[]) +{ + vdi_t *vdi; + char name[VDI_NAME_SZ] = ""; + snap_id_t id; + int from_snap = 0; + + __init_blockstore(); + __init_vdi(); + + if ( argc == 1 ) { + printf("usage: %s [ ]\n", argv[0]); + exit(-1); + } + + strncpy( name, argv[1], VDI_NAME_SZ); + name[VDI_NAME_SZ] = '\0'; + + if ( argc > 3 ) { + id.block = (u64) atoll(argv[2]); + id.index = (unsigned int) atol (argv[3]); + from_snap = 1; + } + + vdi = vdi_create( from_snap ? &id : NULL, name); + + if ( vdi == NULL ) { + printf("Failed to create VDI!\n"); + freeblock(vdi); + exit(-1); + } + + freeblock(vdi); + + return (0); +} diff --git a/tools/blktap/vdi_fill.c b/tools/blktap/vdi_fill.c new file mode 100644 index 0000000000..7e3eacc3f3 --- /dev/null +++ b/tools/blktap/vdi_fill.c @@ -0,0 +1,82 @@ +/************************************************************************** + * + * vdi_fill.c + * + * Hoover a file or device into a vdi. + * You must first create the vdi with vdi_create. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include "blockstore.h" +#include "radix.h" +#include "vdi.h" + +int main(int argc, char *argv[]) +{ + vdi_t *vdi; + u64 id; + int fd; + struct stat st; + u64 tot_size; + char spage[BLOCK_SIZE]; + char *dpage; + u64 vblock = 0, count=0; + + __init_blockstore(); + __init_vdi(); + + if ( argc < 3 ) { + printf("usage: %s \n", argv[0]); + exit(-1); + } + + id = (u64) atoll(argv[1]); + + vdi = vdi_get( id ); + + if ( vdi == NULL ) { + printf("Failed to retreive VDI %Ld!\n", id); + exit(-1); + } + + fd = open(argv[2], O_RDONLY | O_LARGEFILE); + + if (fd < 0) { + printf("Couldn't open %s!\n", argv[2]); + exit(-1); + } + + if ( fstat(fd, &st) != 0 ) { + printf("Couldn't stat %s!\n", argv[2]); + exit(-1); + } + + tot_size = (u64) st.st_size; + printf("Filling VDI %Ld with %Ld bytes.\n", id, tot_size); + + printf("%011Ld blocks total\n", tot_size / BLOCK_SIZE); + printf(" "); + while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) { + u64 gblock = 0; + + gblock = allocblock(spage); + vdi_update_block(vdi, vblock, gblock); + + vblock++; + if ((vblock % 512) == 0) + printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock); + fflush(stdout); + } + printf("\n"); + + freeblock(vdi); + + return (0); +} diff --git a/tools/blktap/vdi_list.c b/tools/blktap/vdi_list.c new file mode 100644 index 0000000000..c08abe27b6 --- /dev/null +++ b/tools/blktap/vdi_list.c @@ -0,0 +1,47 @@ +/************************************************************************** + * + * vdi_list.c + * + * Print a list of VDIs on the block store. + * + */ + +#include +#include +#include +#include +#include "blockstore.h" +#include "radix.h" +#include "vdi.h" + +int main(int argc, char *argv[]) +{ + vdi_registry_t *reg; + vdi_t *vdi; + int i; + + __init_blockstore(); + __init_vdi(); + + reg = get_vdi_registry(); + + if ( reg == NULL ) { + printf("couldn't get VDI registry.\n"); + exit(-1); + } + + for (i=0; i < reg->nr_vdis; i++) { + vdi = vdi_get(i); + + if ( vdi != NULL ) { + + printf("%10Ld %60s\n", vdi->id, vdi->name); + freeblock(vdi); + + } + } + + freeblock(reg); + + return 0; +} diff --git a/tools/blktap/vdi_snap.c b/tools/blktap/vdi_snap.c new file mode 100644 index 0000000000..3b09898dc6 --- /dev/null +++ b/tools/blktap/vdi_snap.c @@ -0,0 +1,43 @@ +/************************************************************************** + * + * vdi_snap.c + * + * Snapshot a vdi. + * + */ + +#include +#include +#include +#include +#include "blockstore.h" +#include "radix.h" +#include "vdi.h" + +int main(int argc, char *argv[]) +{ + vdi_t *vdi; + u64 id; + + __init_blockstore(); + __init_vdi(); + + if ( argc == 1 ) { + printf("usage: %s \n", argv[0]); + exit(-1); + } + + id = (u64) atoll(argv[1]); + + vdi = vdi_get(id); + + if ( vdi == NULL ) { + printf("couldn't find the requested VDI.\n"); + freeblock(vdi); + exit(-1); + } + + vdi_snapshot(vdi); + + return 0; +} diff --git a/tools/blktap/vdi_snap_list.c b/tools/blktap/vdi_snap_list.c new file mode 100644 index 0000000000..0e37e9497a --- /dev/null +++ b/tools/blktap/vdi_snap_list.c @@ -0,0 +1,79 @@ +/************************************************************************** + * + * vdi_snap_list.c + * + * Print a list of snapshots for the specified vdi. + * + */ + +#include +#include +#include +#include +#include +#include "blockstore.h" +#include "radix.h" +#include "vdi.h" + +int main(int argc, char *argv[]) +{ + vdi_t *vdi; + u64 id; + int i, max_snaps = -1; + snap_block_t *blk; + snap_id_t sid; + char *t; + + __init_blockstore(); + __init_vdi(); + + if ( argc == 1 ) { + printf("usage: %s [max snaps]\n", argv[0]); + exit(-1); + } + + id = (u64) atoll(argv[1]); + + if ( argc > 2 ) { + max_snaps = atoi(argv[2]); + } + + vdi = vdi_get(id); + + if ( vdi == NULL ) { + printf("couldn't find the requested VDI.\n"); + freeblock(vdi); + exit(-1); + } + + sid = vdi->snap; + sid.index--; + + //printf("%6s%4s%21s %12s\n", "Block", "idx", "timestamp", "radix root"); + printf("%6s%4s%37s %12s\n", "Block", "idx", "timestamp", "radix root"); + + while (sid.block != 0) { + blk = snap_get_block(sid.block); + for (i = sid.index; i >= 0; i--) { + if ( max_snaps == 0 ) { + freeblock(blk); + goto done; + } + t = ctime(&blk->snaps[i].timestamp.tv_sec); + t[strlen(t)-1] = '\0'; + //printf("%6Ld%4u%14lu.%06lu %12Ld\n", + printf("%6Ld%4u%30s %06lu %12Ld\n", + sid.block, i, + //blk->snaps[i].timestamp.tv_sec, + t, + blk->snaps[i].timestamp.tv_usec, + blk->snaps[i].radix_root); + if ( max_snaps != -1 ) + max_snaps--; + } + sid = blk->hdr.parent_block; + freeblock(blk); + } +done: + return 0; +} diff --git a/tools/blktap/vdi_tree.c b/tools/blktap/vdi_tree.c new file mode 100644 index 0000000000..d43abccef4 --- /dev/null +++ b/tools/blktap/vdi_tree.c @@ -0,0 +1,132 @@ +/************************************************************************** + * + * vdi_tree.c + * + * Output current vdi tree to dot and postscript. + * + */ + +#include +#include +#include +#include +#include "blockstore.h" +#include "radix.h" +#include "vdi.h" + +#define GRAPH_DOT_FILE "vdi.dot" +#define GRAPH_PS_FILE "vdi.ps" + +typedef struct sh_st { + snap_id_t id; + struct sh_st *next; +} sh_t; + +#define SNAP_HASHSZ 1024 +sh_t *node_hash[SNAP_HASHSZ]; +#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ) + +#define SNAPID_EQUAL(_a,_b) \ + (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index)) +int sh_check_and_add(snap_id_t *id) +{ + sh_t **s = &node_hash[SNAP_HASH(id)]; + + while (*s != NULL) { + if (SNAPID_EQUAL(&((*s)->id), id)) + return 1; + *s = (*s)->next; + } + + *s = (sh_t *)malloc(sizeof(sh_t)); + (*s)->id = *id; + (*s)->next = NULL; + + return 0; +} + +int main(int argc, char *argv[]) +{ + FILE *f; + char dot_file[255] = GRAPH_DOT_FILE; + char ps_file[255] = GRAPH_PS_FILE; + int nr_vdis = 0, nr_forks = 0; + vdi_registry_t *reg; + vdi_t *vdi; + int i; + + __init_blockstore(); + __init_vdi(); + + reg = get_vdi_registry(); + + if ( reg == NULL ) { + printf("couldn't get VDI registry.\n"); + exit(-1); + } + + if ( argc > 1 ) { + strncpy(ps_file, argv[1], 255); + ps_file[255] = '\0'; + } + + /* now dump it out to a dot file. */ + printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis); + + f = fopen(dot_file, "w"); + + /* write graph preamble */ + fprintf(f, "digraph G {\n"); + fprintf(f, " rankdir=LR\n"); + + for (i=0; inr_vdis; i++) { + char oldnode[255]; + snap_block_t *blk; + snap_id_t id; + int nr_snaps, done=0; + + vdi = vdi_get(i); + id = vdi->snap; + /* add a node for the id */ +printf("vdi: %d\n", i); + fprintf(f, " n%Ld%d [color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n", + id.block, id.index, vdi->name, + id.block, id.index); + sprintf(oldnode, "n%Ld%d", id.block, id.index); + + while (id.block != 0) { + blk = snap_get_block(id.block); + nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index); + id = blk->hdr.fork_block; + + done = sh_check_and_add(&id); + + /* add a node for the fork_id */ + if (!done) { + fprintf(f, " n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n", + id.block, id.index, + id.block, id.index); + } + + /* add an edge between them */ + fprintf(f, " n%Ld%d -> %s [label=\"%u snapshots\"]\n", + id.block, id.index, oldnode, nr_snaps); + sprintf(oldnode, "n%Ld%d", id.block, id.index); + freeblock(blk); + + if (done) break; + } + } + + /* write graph postamble */ + fprintf(f, "}\n"); + fclose(f); + + printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE); + { + char cmd[255]; + sprintf(cmd, "dot %s -Tps -o %s", dot_file, ps_file); + system(cmd); + } + return 0; +} diff --git a/tools/blktap/vdi_validate.c b/tools/blktap/vdi_validate.c new file mode 100644 index 0000000000..a2468615a0 --- /dev/null +++ b/tools/blktap/vdi_validate.c @@ -0,0 +1,96 @@ +/************************************************************************** + * + * vdi_validate.c + * + * Intended to sanity-check vm_fill and the underlying vdi code. + * + * Block-by-block compare of a vdi with a file/device on the disk. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include "blockstore.h" +#include "radix.h" +#include "vdi.h" + +int main(int argc, char *argv[]) +{ + vdi_t *vdi; + u64 id; + int fd; + struct stat st; + u64 tot_size; + char spage[BLOCK_SIZE], *dpage; + char *vpage; + u64 vblock = 0, count=0; + + __init_blockstore(); + __init_vdi(); + + if ( argc < 3 ) { + printf("usage: %s \n", argv[0]); + exit(-1); + } + + id = (u64) atoll(argv[1]); + + vdi = vdi_get( id ); + + if ( vdi == NULL ) { + printf("Failed to retreive VDI %Ld!\n", id); + exit(-1); + } + + fd = open(argv[2], O_RDONLY | O_LARGEFILE); + + if (fd < 0) { + printf("Couldn't open %s!\n", argv[2]); + exit(-1); + } + + if ( fstat(fd, &st) != 0 ) { + printf("Couldn't stat %s!\n", argv[2]); + exit(-1); + } + + tot_size = (u64) st.st_size; + printf("Testing VDI %Ld (%Ld bytes).\n", id, tot_size); + + printf(" "); + while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) { + u64 gblock = 0; + + gblock = vdi_lookup_block(vdi, vblock, NULL); + + if (gblock == 0) { + printf("\n\nfound an unmapped VDI block (%Ld)\n", vblock); + exit(0); + } + + dpage = readblock(gblock); + + if (memcmp(spage, dpage, BLOCK_SIZE) != 0) { + printf("\n\nblocks don't match! (%Ld)\n", vblock); + exit(0); + } + + freeblock(dpage); + + vblock++; + printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock); + fflush(stdout); + } + printf("\n"); + + printf("VDI %Ld looks good!\n", id); + + freeblock(vdi); + + return (0); +} diff --git a/tools/python/xen/xend/server/blkif.py b/tools/python/xen/xend/server/blkif.py index f40f7d5b6b..00cd9216d5 100755 --- a/tools/python/xen/xend/server/blkif.py +++ b/tools/python/xen/xend/server/blkif.py @@ -369,7 +369,7 @@ class BlkDev(controller.SplitDev): # Add a new disk type that will just pass an opaque id in the # start_sector and use an experimental device type. # Please contact andrew.warfield@cl.cam.ac.uk with any concerns. - if self.type == 'amorfs': + if self.type == 'parallax': self.node = node self.device = 61440 # (240,0) self.start_sector = long(self.params)